{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Resolving conflicts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There is currently no in-built support for resolving conflicts within a table using petl, however this notebook gives an example of a workaround strategy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sys\n",
    "sys.version_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.0.6'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import petl as etl\n",
    "etl.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class='petl'>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>id</th>\n",
       "<th>name</th>\n",
       "<th>value</th>\n",
       "<th>age</th>\n",
       "<th>master_age</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>1</td>\n",
       "<td>Tressa</td>\n",
       "<td>1203</td>\n",
       "<td>42</td>\n",
       "<td>42</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>2</td>\n",
       "<td>Phil</td>\n",
       "<td>23997</td>\n",
       "<td>None</td>\n",
       "<td>None</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>3</td>\n",
       "<td>Darius</td>\n",
       "<td>None</td>\n",
       "<td>78</td>\n",
       "<td>78</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>4</td>\n",
       "<td>Delinda</td>\n",
       "<td>96501</td>\n",
       "<td>64</td>\n",
       "<td>64</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>5</td>\n",
       "<td>Adelina</td>\n",
       "<td>96508</td>\n",
       "<td>50</td>\n",
       "<td>50</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "+-----+-----------+---------+------+------------+\n",
       "| id  | name      | value   | age  | master_age |\n",
       "+=====+===========+=========+======+============+\n",
       "| '1' | 'Tressa'  | '1203'  | '42' | '42'       |\n",
       "+-----+-----------+---------+------+------------+\n",
       "| '2' | 'Phil'    | '23997' | None | None       |\n",
       "+-----+-----------+---------+------+------------+\n",
       "| '3' | 'Darius'  | None    | '78' | '78'       |\n",
       "+-----+-----------+---------+------+------------+\n",
       "| '4' | 'Delinda' | '96501' | '64' | '64'       |\n",
       "+-----+-----------+---------+------+------------+\n",
       "| '5' | 'Adelina' | '96508' | '50' | '50'       |\n",
       "+-----+-----------+---------+------+------------+"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_master = b\"\"\"id  name              value            age\n",
    " 1  Tressa           1203              42\n",
    " 2  Phil                23997            \n",
    " 3  Darius                .                 78\n",
    " 4  Delinda          96501            64\n",
    " 5  Adelina          96508             50\n",
    "\"\"\"\n",
    "tbl_master = (\n",
    "    etl\n",
    "    .fromtext(etl.MemorySource(data_master))\n",
    "    .split('lines', r'\\s+')\n",
    "    .skip(1)\n",
    "    .replaceall('.', None)\n",
    "    .addfield('master_age', lambda row: row.age)\n",
    ")\n",
    "tbl_master"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class='petl'>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>id</th>\n",
       "<th>name</th>\n",
       "<th>value</th>\n",
       "<th>age</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>2</td>\n",
       "<td>Phil</td>\n",
       "<td>None</td>\n",
       "<td>53</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>3</td>\n",
       "<td>Darius</td>\n",
       "<td>5000</td>\n",
       "<td>76</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "+-----+----------+--------+------+\n",
       "| id  | name     | value  | age  |\n",
       "+=====+==========+========+======+\n",
       "| '2' | 'Phil'   | None   | '53' |\n",
       "+-----+----------+--------+------+\n",
       "| '3' | 'Darius' | '5000' | '76' |\n",
       "+-----+----------+--------+------+"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_other = b\"\"\"id  name                value            age\n",
    " 2  Phil                 .                    53\n",
    " 3  Darius           5000             76\n",
    "\"\"\"\n",
    "tbl_other = (\n",
    "    etl\n",
    "    .fromtext(etl.MemorySource(data_other))\n",
    "    .split('lines', r'\\s+')\n",
    "    .skip(1)\n",
    "    .replaceall('.', None)\n",
    ")\n",
    "tbl_other"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class='petl'>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>id</th>\n",
       "<th>name</th>\n",
       "<th>value</th>\n",
       "<th>age</th>\n",
       "<th>master_age</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>1</td>\n",
       "<td>Tressa</td>\n",
       "<td>1203</td>\n",
       "<td>42</td>\n",
       "<td>42</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>2</td>\n",
       "<td>Phil</td>\n",
       "<td>23997</td>\n",
       "<td>53</td>\n",
       "<td>None</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>3</td>\n",
       "<td>Darius</td>\n",
       "<td>5000</td>\n",
       "<td>Conflict({'76', '78'})</td>\n",
       "<td>78</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>4</td>\n",
       "<td>Delinda</td>\n",
       "<td>96501</td>\n",
       "<td>64</td>\n",
       "<td>64</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>5</td>\n",
       "<td>Adelina</td>\n",
       "<td>96508</td>\n",
       "<td>50</td>\n",
       "<td>50</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "+-----+-----------+---------+------------------------+------------+\n",
       "| id  | name      | value   | age                    | master_age |\n",
       "+=====+===========+=========+========================+============+\n",
       "| '1' | 'Tressa'  | '1203'  | '42'                   | '42'       |\n",
       "+-----+-----------+---------+------------------------+------------+\n",
       "| '2' | 'Phil'    | '23997' | '53'                   | None       |\n",
       "+-----+-----------+---------+------------------------+------------+\n",
       "| '3' | 'Darius'  | '5000'  | Conflict({'76', '78'}) | '78'       |\n",
       "+-----+-----------+---------+------------------------+------------+\n",
       "| '4' | 'Delinda' | '96501' | '64'                   | '64'       |\n",
       "+-----+-----------+---------+------------------------+------------+\n",
       "| '5' | 'Adelina' | '96508' | '50'                   | '50'       |\n",
       "+-----+-----------+---------+------------------------+------------+"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tbl_merge = etl.merge(tbl_master, tbl_other, key='id')\n",
    "tbl_merge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class='petl'>\n",
       "<thead>\n",
       "<tr>\n",
       "<th>id</th>\n",
       "<th>name</th>\n",
       "<th>value</th>\n",
       "<th>age</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>1</td>\n",
       "<td>Tressa</td>\n",
       "<td>1203</td>\n",
       "<td>42</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>2</td>\n",
       "<td>Phil</td>\n",
       "<td>23997</td>\n",
       "<td>53</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>3</td>\n",
       "<td>Darius</td>\n",
       "<td>5000</td>\n",
       "<td>78</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>4</td>\n",
       "<td>Delinda</td>\n",
       "<td>96501</td>\n",
       "<td>64</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>5</td>\n",
       "<td>Adelina</td>\n",
       "<td>96508</td>\n",
       "<td>50</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "+-----+-----------+---------+------+\n",
       "| id  | name      | value   | age  |\n",
       "+=====+===========+=========+======+\n",
       "| '1' | 'Tressa'  | '1203'  | '42' |\n",
       "+-----+-----------+---------+------+\n",
       "| '2' | 'Phil'    | '23997' | '53' |\n",
       "+-----+-----------+---------+------+\n",
       "| '3' | 'Darius'  | '5000'  | '78' |\n",
       "+-----+-----------+---------+------+\n",
       "| '4' | 'Delinda' | '96501' | '64' |\n",
       "+-----+-----------+---------+------+\n",
       "| '5' | 'Adelina' | '96508' | '50' |\n",
       "+-----+-----------+---------+------+"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tbl_merge_resolved = (\n",
    "    tbl_merge\n",
    "    .convert('age', lambda v, row: (row.master_age if isinstance(v, etl.Conflict) else v),\n",
    "             pass_row=True)\n",
    "    .cutout('master_age')\n",
    ")\n",
    "tbl_merge_resolved"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
